CmdStanR needs it’s input data as a list
Code
prepare_data <- function (df = corrosion_data) {
df |>
arrange (anomaly_id, T) |>
group_by (anomaly_id) |>
mutate (
next_depth = lead (measured_depth_mm),
time_diff = lead (T) - T
) |>
filter (! is.na (next_depth)) |>
mutate (
delta_C = (next_depth - measured_depth_mm) / time_diff
) |>
select (anomaly_id, delta_C, soil_type) |>
ungroup ()
}
model_data <- list (
n_anomalies = prepare_data ()$ anomaly_id |> unique () |> length (),
n_inspections = 2 ,
cgr = prepare_data ()$ delta_C
)
cgr_post <- cgr_model$ sample (data = model_data)
Running MCMC with 4 sequential chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 1 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 1 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 1 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 1 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 1 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 1 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 1 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 1 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 0.1 seconds.
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 2 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 2 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 2 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 2 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 2 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 2 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 2 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 2 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 0.1 seconds.
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 3 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 3 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 3 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 3 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 3 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 3 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 3 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 3 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 0.1 seconds.
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 4 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 4 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 4 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 4 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 4 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 4 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 4 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 4 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 0.1 seconds.
All 4 chains finished successfully.
Mean chain execution time: 0.1 seconds.
Total execution time: 0.8 seconds.
CmdStanPy needs it’s input data as a dictionary
Code
def prepare_data(df = corrosion_data):
return (
df.sort(['anomaly_id' , 'T' ])
.group_by('anomaly_id' )
.agg([
pl.col('measured_depth_mm' ).shift(- 1 ).alias('next_depth' ),
pl.col('T' ).shift(- 1 ).alias('next_time' ),
pl.col('measured_depth_mm' ),
pl.col('T' )
])
.filter (pl.col('next_depth' ).is_not_null())
.with_columns([
((pl.col('next_depth' ) - pl.col('measured_depth_mm' )) /
(pl.col('next_time' ) - pl.col('T' ))).alias('delta_C' )
])
.select(['anomaly_id' , 'delta_C' ])
.explode('delta_C' ) # Add this line to unnest the lists
.filter (pl.col('delta_C' ).is_not_null()) # Optional: remove null values if any
)
model_data = {
'n_anomalies' : prepare_data().select('anomaly_id' ).unique().height,
'n_inspections' : 2 ,
'cgr' : prepare_data().select('delta_C' ).to_series().to_numpy()
}
cgr_post = cgr_model.sample(data = model_data)
INFO:cmdstanpy:CmdStan start processing
chain 1 |[33m [0m| 00:00 Status
chain 2 |[33m [0m| 00:00 Status[A
chain 3 |[33m [0m| 00:00 Status[A[A
chain 4 |[33m [0m| 00:00 Status[A[A[A
chain 3 |[34m######3 [0m| 00:00 Iteration: 1100 / 2000 [ 55%] (Sampling)[A[A
chain 1 |[34m#######2 [0m| 00:00 Iteration: 1300 / 2000 [ 65%] (Sampling)
chain 4 |[34m######8 [0m| 00:00 Iteration: 1200 / 2000 [ 60%] (Sampling)[A[A[A
chain 2 |[34m#######2 [0m| 00:00 Iteration: 1300 / 2000 [ 65%] (Sampling)[A
chain 1 |[34m##########[0m| 00:00 Sampling completed
chain 2 |[34m##########[0m| 00:00 Sampling completed
chain 3 |[34m##########[0m| 00:00 Sampling completed
chain 4 |[34m##########[0m| 00:00 Sampling completed
INFO:cmdstanpy:CmdStan done processing.
A Turing model needs it’s input data as arguments to the model function
Code
function prepare_data (df:: DataFrame = corrosion_data)
sorted_df = sort (df, [: anomaly_id, : T]); result = DataFrame ()
for group in groupby (sorted_df, : anomaly_id)
if nrow (group) > 1
for i in 1 : (nrow (group)- 1 )
Δc = (group[i+ 1 , : measured_depth_mm] - group[i, : measured_depth_mm]) / (group[i+ 1 , : T] - group[i, : T])
push! (result, (
anomaly_id = group[i, : anomaly_id], Δc = max (0 , Δc)
))
end
end
end
return result
end
cgr_post = prepare_data ().Δc |>
data -> corrosion_growth (data) |>
model -> sample (model, NUTS (), 1_000 )